import codecs

#获得分词词典，存储为字典形式
f1 = codecs.open('words/现代汉语词典.txt','r',encoding='gbk')
f2=open('words/words.txt','w')
dic = {}

while 1:
# for line in f1.xreadlines():
    try:
        line = f1.readline()
    except UnicodeDecodeError:
        try:
            line = f1.readline()
        except UnicodeDecodeError:
            line = f1.readline()
    if not line:
        break
    if 'word' not in line:
        continue
    term = line.strip()[7:]
    f2.write(term)
    f2.write('\n')
    #dic[term] = 1
    print(term)
f1.close()
print("提取词典成功")
f2.close()
print('存储词典成功')